In [1]:
# 1 select pairs for trading
# 2 fit marginal distribution for returns for the securities chosen 
# 3 make marginal returns unniformly distributed using the probability integral transform
# 4 fit copula to transformed returns 
# 5 use fitted copula to calculate conditional probabilities
# 6 enter and exit long / short positions when conditional probabilities fall outside confidence bands

Data Grab¶

In [1]:
import pandas as pd
import pandas_datareader.data as web
import seaborn
from statsmodels.tsa.stattools import coint, adfuller
import numpy as np
import datetime as dt
import sqlite3
import time
import config 
import statsmodels.tsa.stattools as ts
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
from copula import *
import plotly.express as px
import ndtest                      # bivariate Kolmogorov-Smirnov
import warnings
warnings.filterwarnings("ignore")


class Trading:
    
    def __init__(self, condition, start, end):
        if condition == 1:
            self.period1 = start
            self.period2 = end
        else:
            self.period1 = int(time.mktime((dt.datetime.now() - dt.timedelta(365*35)).timetuple()))
            self.period2 = int(time.mktime(dt.datetime.now().timetuple()))
        
        self.fred_start = str(dt.datetime.now() - dt.timedelta(365*35)).split()[0]
        self.fred_end = dt.datetime.now()
        self.interval = '1d'
        self.ticker_list = ["^GSPC","SPXL","XLK","XLU","XLI","XLY","XLP","XLB","XLM","XLV","XLE",
                    "AGG","JNK","LQD","CWB","BKLN","TIP","TLT","MUB","MBB","XLCPR","XLRE","USO",
                    "GDX","GDXJ","QQQ", "KBE","KRE","XSB","XME","XHB","KCE", "XRT","XOP","XES",
                    "XSW","XAR","XITK","XBI","KIE","XHE","XHS","DIA","SPAB","SPSB","SPIB","SHY"
                    "SPLB","SPBO","SPTS","SPTI","SPTI","SPTL","SPMB","SPHY","SPIP", "HYG", "IEF", 
                           "TMF","TYD","TYO"] 

    
    def yahoo_universe(self):
        web3 = []
        failed = []
        for security in self.ticker_list:
            try:                                                    
                interval = '1d' 
                query_string = f'https://query1.finance.yahoo.com/v7/finance/download/{security}?period1={self.period1}&period2={self.period2}&interval={self.interval}&events=history&includeAdjustedClose=true'
                df = pd.read_csv(query_string).set_index("Date")
                df['ticker'] = security
                df.columns = ['Open', 'High', 'Low','Close','Adj Close','Volume','ticker']
                web3.append(df)
            except:
                failed.append(security)                                                
                pass
            final = pd.concat(web3)
            df_final = final.reset_index()
        return df_final, web3, failed

    
    def trading_sql(self, data_file):
        try:
            conn = sqlite3.connect('tradable_universe') 
            data_file.to_sql("consensusinvestable", conn, if_exists='replace', index = False)
            conn.commit()
            conn.close()
            print("Updating the sql table was a success")
        except:
            print("Was unsuccessful in uploading the dataframe into the datatable. ")
    
    
    def to_trading_sql(self, data):
        
        """
        Inputs: dataframe you want to upload
        Outputs: saves SQL table to the investable database 
        """
        
        try:
            connection = sqlite3.connect(config.db_file)
            data.to_sql("consensusinvestable", connection, if_exists='replace', index=False)
            connection.commit()
            connection.close()
            print("Uploading dataframe to database worked successfully.")
        except:
            print("Was unable to save the dataframe into the investable datatable.")

    
    def load_sql(self):
        try:
            conn = sqlite3.connect('tradable_universe') 
            df = pd.read_sql_query("SELECT * from consensusinvestable", conn)
            conn.commit()
            conn.close()
        except:
            print("Could not retrieve the datatable. ")
        return df
    
    
    def ticker_filter(self, ticker, final):
        return final[final['ticker']==f"{ticker}"]
    
    
    def dataframe(self, tickers, final, start, end):
        
        """
        Inputs: The tickers you want to filter by and a raw unfiltered data query from trading database. 
        Outputs: A dataframe with the columns being the names of the selected tickers and the values being 
        the adjusted adjusted close prices. 
        """
        
        new_df = []
        for ticker in tickers:
            new = final[final['ticker']==f"{ticker}"]
            new_df.append(new)
        main = pd.concat(new_df)
        df = main.pivot_table(index = main.index, columns = "ticker", values = 'Adj Close')
        return df[start:end]

Instance¶

In [2]:
main = Trading(0, "2018-01-01", "2020-01-01")   # set first input to one if you want exact date inputs

Querying Data and Populating SQL¶

df, df1, df2 = main.yahoo_universe() ### dont have to run this everytime main.trading_sql(df) # placing the data into my sql datatable => consensusinvestable

Loading SQL to See Data¶

In [3]:
final = main.load_sql().set_index("Date")

tickers = ["^GSPC","SPXL","XLK","XLU","XLI","XLY","XLP","XLB","XLV","XLE",
                    "AGG","JNK","LQD","CWB","BKLN","TIP","TLT","MUB","MBB","USO",
                    "GDX","GDXJ","QQQ", "KBE","KRE","DIA","SPAB","SPSB","SPIB","SHY"
                    "SPLB","SPBO","SPTS","SPTI","SPTL","SPMB","SPHY","SPIP", "HYG", "IEF", 
          "TMF","TYD","TYO"]

start = '2000-01-01'
end = '2023-01-18'
df = main.dataframe(tickers, final, start, end)
df = df.dropna()
In [4]:
df
Out[4]:
ticker AGG BKLN CWB DIA GDX GDXJ HYG IEF JNK KBE ... USO XLB XLE XLI XLK XLP XLU XLV XLY ^GSPC
Date
2012-06-19 85.995331 15.456649 23.818144 101.705917 43.258907 72.617805 51.459663 89.747704 63.863827 17.475275 ... 253.440002 28.407240 44.965324 28.944820 24.637094 25.962149 25.844496 31.679304 38.621426 1357.979980
2012-06-20 85.863953 15.475798 23.792805 101.626503 42.886936 71.897118 51.659920 89.564964 64.076920 17.523483 ... 243.919998 28.246611 44.788113 28.789946 24.679869 25.826250 25.592970 31.595167 38.577660 1355.689941
2012-06-21 85.979904 15.418324 23.551884 99.625084 40.664108 66.337532 51.236561 89.755989 63.716236 17.113712 ... 235.679993 27.322996 42.954594 28.227514 24.098166 25.524277 25.327467 31.157635 37.737495 1325.510010
2012-06-22 85.856209 15.463034 23.646980 100.252518 40.446358 66.783669 51.585541 89.365616 63.994984 17.346724 ... 240.800003 27.467567 43.240864 28.325333 24.363346 25.599773 25.334442 31.485794 37.903782 1335.020020
2012-06-25 86.126801 15.456649 23.469479 99.085045 40.972576 66.165939 51.505474 89.855736 63.962147 16.953018 ... 238.399994 27.122211 42.272999 27.803654 23.867186 25.441238 25.285536 31.098736 37.317425 1313.719971
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2023-01-11 99.326591 20.818163 66.197830 338.916870 31.570000 39.099998 75.842575 98.429214 92.903275 46.700001 ... 68.050003 82.970001 88.139999 101.980003 129.160004 74.980003 72.080002 135.279999 138.169998 3969.610107
2023-01-12 100.044830 20.887327 66.647408 341.151306 32.180000 39.900002 76.270508 99.307335 93.400719 47.150002 ... 68.610001 83.279999 89.820000 102.580002 130.119995 74.389999 71.589996 134.850006 138.399994 3983.169922
2023-01-13 99.675728 20.877447 67.007065 342.188751 32.650002 40.400002 76.270508 98.778465 93.480316 47.270000 ... 70.050003 83.790001 89.949997 102.459999 130.490005 74.739998 71.330002 135.449997 139.699997 3999.090088
2023-01-17 99.496170 20.897207 67.216873 338.318359 31.520000 39.099998 76.011757 98.469131 93.211693 47.090000 ... 70.860001 82.949997 90.139999 101.589996 131.080002 74.790001 71.230003 134.820007 139.800003 3990.969971
2023-01-18 100.483757 20.887327 67.017059 332.143707 31.209999 38.560001 76.101326 99.806267 93.301231 45.700001 ... 69.510002 81.779999 88.489998 99.680000 129.389999 72.750000 69.519997 132.919998 137.979996 3928.860107

2663 rows × 41 columns

Kendall Correlation¶

In [5]:
### this kendall correlation coefficient is helpful in fitting joint probability distributions to copulas
# could also make this for one year preceding training and 1 year after testing
# base longs off Composite Macro Indicator
def kendall(df, train_start, train_end, test_start, test_end):
    """
    Inputs: dataframe with all the raw price data (df.set_index("Date")), train_start is the date in string form
    of the training dataset (as is train_end, test_start, and test end)
    Outputs: training data kendall tau's
    """
    returns = np.log(df).diff().dropna()
    #prices_train = df[0:int(df.shape[0]*.7)]
    prices_train = df[train_start:train_end]
    #prices_test = df[-(df.shape[0] - int(df.shape[0]*.7)):]
    prices_test = df[test_start: test_end]
    #returns_train = returns[0:int(returns.shape[0]*.7)]
    returns_train = returns[train_start:train_end]
    #returns_test = returns[-(returns.shape[0] - int(returns.shape[0]*.7)):]
    returns_test = returns[test_start: test_end]

    results = pd.DataFrame(columns=['tau'])

    for s1 in returns_train.columns:
        for s2 in returns_train.columns:
            if (s1!=s2) and (f'{s2}-{s1}' not in results.index):
                results.loc[f'{s1}-{s2}'] = stats.kendalltau(returns_train[s1], returns_train[s2])[0]

    return results, returns_train, returns_test, prices_train, prices_test

#results = kendall(df, "2015-01-01", "2015-12-31", "2016-01-01","2016-12-31")[0]
#returns_train = kendall(df)[1]
#returns_test = kendall(df)[2]
#prices_train = kendall(df)[3]
#prices_test = kendall(df)[4]
results,returns_train,returns_test, prices_train, prices_test   = kendall(df, "2015-01-01", "2015-12-31", "2016-01-01","2016-12-31")
results.sort_values('tau', ascending = False).head(50) # want to see the returns that are most correlated in terms of kendall's tau
Out[5]:
tau
TLT-TMF 0.976237
SPXL-^GSPC 0.968444
SPTL-TLT 0.941973
SPTL-TMF 0.938861
KBE-KRE 0.907918
DIA-SPXL 0.878075
DIA-^GSPC 0.876621
SPIP-TIP 0.846381
QQQ-XLK 0.824556
AGG-IEF 0.814838
GDX-GDXJ 0.812161
HYG-JNK 0.810866
IEF-SPAB 0.798532
XLI-^GSPC 0.793189
AGG-SPAB 0.791474
SPXL-XLI 0.790596
DIA-XLI 0.788003
QQQ-SPXL 0.786189
IEF-TMF 0.784987
IEF-SPTL 0.783918
IEF-TLT 0.783791
QQQ-^GSPC 0.778790
SPXL-XLK 0.758850
XLK-^GSPC 0.755245
SPXL-XLY 0.743063
XLY-^GSPC 0.741356
SPAB-SPTL 0.740250
SPAB-TLT 0.737180
SPAB-TMF 0.736006
AGG-LQD 0.733543
AGG-TLT 0.733124
AGG-TMF 0.732804
AGG-SPTL 0.732618
DIA-XLK 0.730392
IEF-LQD 0.728052
DIA-QQQ 0.727756
CWB-^GSPC 0.727460
DIA-XLY 0.726494
CWB-SPXL 0.720881
LQD-TLT 0.712370
LQD-SPAB 0.711305
LQD-SPTL 0.710473
LQD-TMF 0.709965
IEF-SPTI 0.692326
CWB-QQQ 0.687922
QQQ-XLY 0.687347
SPXL-XLV 0.681591
XLV-^GSPC 0.677481
CWB-DIA 0.674448
IEF-MBB 0.674301

Finding Good Candidates Through Correlation (Tau)¶

In [6]:
def parse_pair(pair):
    s1 = pair[:pair.find('-')]
    s2 = pair[pair.find('-')+1:]
    return s1,s2

def show_pairs():
    selected_stocks = []     # creating empty lists for the stocks
    selected_pairs = []      # creating an empty list for the identified higher correlated pairs. 

    for pair in results.sort_values(by='tau', ascending=False).index:
        s1,s2 = parse_pair(pair)         # selecting the pairs
        if (s1 not in selected_stocks) and (s2 not in selected_stocks):
            selected_stocks.append(s1)   # adding the first pair 
            selected_stocks.append(s2)   # adding the second pair 
            selected_pairs.append(pair)  # adding pairs to selected pairs list => stops after 20

        if len(selected_pairs) == 50:    # selecting 30 pairs with the highest Kendall's rank correlation
            break

    return selected_stocks, selected_pairs    
selected_stocks = show_pairs()[0]
selected_pairs = show_pairs()[1]
selected_pairs
Out[6]:
['TLT-TMF',
 'SPXL-^GSPC',
 'KBE-KRE',
 'SPIP-TIP',
 'QQQ-XLK',
 'AGG-IEF',
 'GDX-GDXJ',
 'HYG-JNK',
 'DIA-XLI',
 'SPAB-SPTL',
 'LQD-SPIB',
 'CWB-XLY',
 'MBB-SPTI',
 'XLB-XLE',
 'XLP-XLV',
 'MUB-TYD',
 'SPBO-SPTS',
 'BKLN-USO',
 'SPMB-SPSB',
 'SPHY-XLU']

Marginal Distributions¶

In [7]:
def marginals(selcted_stocks):

    marginals_df = pd.DataFrame(index=selected_stocks, columns=['Distribution', 'AIC', 'BIC', 'KS_pvalue'])

    for stock in selected_stocks:
        data = returns_train[stock]
        dists = ['Normal', "Student's t", 'Logistic', 'Extreme']
        best_aic = np.inf           # selection process is based on the best AIC
        for dist,name in zip([stats.norm, stats.t, stats.genlogistic, stats.genextreme], dists):
            params = dist.fit(data)
            dist_fit = dist(*params)
            log_like = np.log(dist_fit.pdf(data)).sum()
            aic = 2*len(params) - 2 * log_like
            if aic<best_aic:
                best_dist = name
                best_aic = aic      # selection process is based on the best AIC
                best_bic = len(params) * np.log(len(data)) - 2 * log_like
                ks_pval = stats.kstest(data, dist_fit.cdf, N=100)[1]
        marginals_df.loc[stock] = [best_dist, best_aic, best_bic, ks_pval]

    ### P-values were quite large which is a good thing meaning we can't reject the null hypothesis that the distributions are the same
    ### Low KS-Test P-values means that the parametric distributions are not a good fit for the data
    return marginals_df

### creating a new dataframe of securities that fit their respective distributions
marginals_df = marginals(selected_stocks)
new_marg = marginals_df[marginals_df['KS_pvalue'] > 0.10]
df = df[new_marg.index]
marginals_df
Out[7]:
Distribution AIC BIC KS_pvalue
TLT Normal -1610.760654 -1603.701795 0.586381
TMF Normal -1062.530229 -1055.471371 0.419009
SPXL Student's t -1077.419825 -1066.831537 0.679778
^GSPC Student's t -1627.912715 -1617.324427 0.674828
KBE Logistic -1473.359466 -1462.771179 0.999844
KRE Student's t -1458.679084 -1448.090797 0.968607
SPIP Normal -2078.972581 -2071.913723 0.750715
TIP Student's t -2130.657051 -2120.068764 0.93879
QQQ Student's t -1564.853114 -1554.264826 0.921391
XLK Student's t -1562.898441 -1552.310154 0.972787
AGG Normal -2323.835803 -2316.776944 0.666722
IEF Normal -2044.583808 -2037.524949 0.541826
GDX Student's t -1074.754705 -1064.166417 0.820958
GDXJ Normal -1051.351878 -1044.293019 0.908549
HYG Student's t -2053.086546 -2042.498258 0.965974
JNK Logistic -2094.320008 -2083.731721 0.688192
DIA Logistic -1626.772388 -1616.184101 0.754299
XLI Logistic -1600.002338 -1589.414051 0.84609
SPAB Normal -2320.441858 -2313.383 0.291949
SPTL Normal -1644.395013 -1637.336155 0.651338
LQD Logistic -2096.407575 -2085.819287 0.764367
SPIB Logistic -2384.139415 -2373.551128 0.413805
CWB Logistic -1825.223375 -1814.635087 0.320368
XLY Logistic -1599.074169 -1588.485882 0.965182
MBB Logistic -2502.507265 -2491.918977 0.932075
SPTI Logistic -2466.017288 -2455.429001 0.361394
XLB Logistic -1530.76709 -1520.178803 0.983511
XLE Logistic -1381.761192 -1371.172905 0.748201
XLP Logistic -1678.533174 -1667.944887 0.850258
XLV Logistic -1541.028177 -1530.439889 0.996379
MUB Logistic -2470.483304 -2459.895017 0.943945
TYD Logistic -1474.445374 -1463.857087 0.071785
SPBO Logistic -2089.800962 -2079.212675 0.728049
SPTS Student's t -2548.103569 -2537.515281 0.142942
BKLN Logistic -2403.252032 -2392.663744 0.149573
USO Logistic -1111.298255 -1100.709968 0.826598
SPMB Normal -2334.849811 -2327.790953 0.529391
SPSB Normal -2845.80951 -2838.750652 0.077819
SPHY Logistic -2016.368063 -2005.779776 0.58341
XLU Logistic -1579.001328 -1568.413041 0.975958
In [8]:
def copulas(selected_pairs, returns_train):
    copulas_df = pd.DataFrame(index=selected_pairs, columns=['copula', 'parameter', 'aic', 'bic', 'KS_pvalue'])

    for pair in selected_pairs:
        s1,s2 = parse_pair(pair)
        params_s1 = stats.t.fit(returns_train[s1])   # fit marginals
        dist_s1 = stats.t(*params_s1)
        params_s2 = stats.t.fit(returns_train[s2])   # fit marginals
        dist_s2 = stats.t(*params_s2)
        u = dist_s1.cdf(returns_train[s1])           # probability integral transform
        v = dist_s2.cdf(returns_train[s2])           # probability integral transform

        best_aic = np.inf
        # GaussianCopula()
        for copula in [ClaytonCopula(), GumbelCopula(), FrankCopula(), JoeCopula()]:
            copula.fit(u,v)
            L = copula.log_likelihood(u,v)
            aic = 2 * copula.num_params - 2 * L
            if aic < best_aic:
                best_aic = aic
                best_bic = copula.num_params * np.log(len(u)) - 2 * L
                best_copula = copula.name

                # calculate KS-pvalue
                smp = copula.sample(size=len(u)) # generate sample from fit copula
                s_u = smp[:,0]
                s_v = smp[:,1]
                ks_pval = ndtest.ks2d2s(u,v,s_u,s_v)

                if isinstance(copula, ArchimedeanCopula):
                    best_param = copula.alpha
                else:
                    best_param = copula.rho

        copulas_df.loc[pair] = [best_copula, best_param, best_aic, best_bic, ks_pval]

    return copulas_df

# a KS value below 0.05 or close is bad as it says these distributions are very close to one another
copulas_df = copulas(selected_pairs, returns_train)
In [9]:
copulas_df
Out[9]:
copula parameter aic bic KS_pvalue
TLT-TMF Gumbel 19.999993 -1470.560308 -1467.030879 0.841738
SPXL-^GSPC Gumbel 19.999993 -1431.466213 -1427.936784 0.33278
KBE-KRE Gumbel 9.287345 -907.798652 -904.269223 0.254518
SPIP-TIP Frank 19.999994 -621.388576 -617.859147 0.178558
QQQ-XLK Gumbel 6.008995 -693.107085 -689.577656 0.49749
AGG-IEF Gumbel 5.387482 -599.444074 -595.914645 0.353594
GDX-GDXJ Frank 18.964843 -564.082877 -560.553448 0.301371
HYG-JNK Gumbel 4.75396 -575.195083 -571.665654 0.480766
DIA-XLI Gumbel 4.265284 -526.302833 -522.773404 0.659108
SPAB-SPTL Frank 13.023785 -412.03326 -408.503831 0.123467
LQD-SPIB Gumbel 2.692679 -294.567726 -291.038297 0.230292
CWB-XLY Gumbel 2.559714 -277.171703 -273.642274 0.204521
MBB-SPTI Frank 7.72779 -231.767919 -228.23849 0.163916
XLB-XLE Gumbel 2.10329 -209.080794 -205.551364 0.560357
XLP-XLV Gumbel 2.156211 -209.652546 -206.123117 0.496145
MUB-TYD Frank 4.324432 -98.648962 -95.119533 0.132821
SPBO-SPTS Gumbel 1.413525 -45.293663 -41.764234 0.398201
BKLN-USO Frank 2.360884 -33.521638 -29.992208 0.092707
SPMB-SPSB Clayton 0.432169 -13.563769 -10.03434 0.079563
SPHY-XLU Gumbel 1.109499 -3.798345 -0.268915 0.384843
In [10]:
new_copula_df = copulas_df[copulas_df['KS_pvalue']>0.1].sort_values("KS_pvalue",ascending=False)
selected_pairs = new_copula_df.index  # new copulas with KS p_values greater than 0.1 (could alter to 0.05)
selected_pairs
Out[10]:
Index(['TLT-TMF', 'DIA-XLI', 'XLB-XLE', 'QQQ-XLK', 'XLP-XLV', 'HYG-JNK',
       'SPBO-SPTS', 'SPHY-XLU', 'AGG-IEF', 'SPXL-^GSPC', 'GDX-GDXJ', 'KBE-KRE',
       'LQD-SPIB', 'CWB-XLY', 'SPIP-TIP', 'MBB-SPTI', 'MUB-TYD', 'SPAB-SPTL'],
      dtype='object')
In [11]:
algo_returns = {}
cl = 0.85                                       # confidence bands

history = []

for pair in selected_pairs:
    s1,s2 = parse_pair(pair)

    params_s1 = stats.t.fit(returns_train[s1])  # fit marginals
    dist_s1 = stats.t(*params_s1)          
    params_s2 = stats.t.fit(returns_train[s2])  # fit marginals
    dist_s2 = stats.t(*params_s2)

    u = dist_s1.cdf(returns_train[s1])          # transform marginals
    v = dist_s2.cdf(returns_train[s2])          # transform marginals
        
    best_aic = np.inf                           # fit copula
    best_copula = None                          # could incorporate a mixture of copulas explaining each relationship

    
    ### could take out the gaussian copula because the problems with the 08 GFC
        # GaussianCopula()
    copulas = [ClaytonCopula(), GumbelCopula(), FrankCopula(), JoeCopula()]    # copulas called from copulas.py file 
    for copula in copulas:
        copula.fit(u,v)
        L = copula.log_likelihood(u,v)
        aic = 2 * copula.num_params - 2 * L
        if aic < best_aic:        # choose the copula with the lowest AIC 
            best_aic = aic
            best_copula = copula  # best copula to fit the joint distribution using AIC 
            
    prob_s1 = []   # calculate conditional probabilities
    prob_s2 = []   # calculate conditional probabilities

    for u,v in zip(dist_s1.cdf(returns_test[s1]), dist_s2.cdf(returns_test[s2])):
        prob_s1.append(best_copula.cdf_u_given_v(u,v))
        prob_s2.append(best_copula.cdf_v_given_u(u,v))
        
    probs_trade = pd.DataFrame(np.vstack([prob_s1, prob_s2]).T, index=returns_test.index, columns=[s1, s2])
    history.append(probs_trade)
    positions = pd.DataFrame(index=probs_trade.index, columns=probs_trade.columns)
    long = False
    short = False

    for t in positions.index:    
        # if long position is open
        if long:
            if (probs_trade.loc[t][s1] > 0.5) or (probs_trade.loc[t][s2] < 0.5):
                positions.loc[t] = [0,0]
                long = False
            else:
                positions.loc[t] = [1,-1]

        # if short position is open
        elif short:
            if (probs_trade.loc[t][s1] < 0.5) or (probs_trade.loc[t][s2] > 0.5):
                positions.loc[t] = [0,0]
                short = False
            else:
                positions.loc[t] = [-1,1]

        # if no positions are open
        else:
            if (probs_trade.loc[t][s1] < (1-cl)) and (probs_trade.loc[t][s2] > cl):
                # open long position
                positions.loc[t] = [1,-1]
                long = True
            elif (probs_trade.loc[t][s1] > cl) and (probs_trade.loc[t][s2] < (1-cl)):
                # open short positions
                positions.loc[t] = [-1,1]
                short = True
            else:
                positions.loc[t] = [0,0]
                
    # calculate returns
    algo_ret = (returns_test * positions.shift()).sum(axis=1)
    algo_returns[pair] = algo_ret
In [12]:
for i in history:
    print(i)   # prints the results of the return testing data frame 
                 TLT       TMF
Date                          
2016-01-04  0.000139  0.999840
2016-01-05  0.498514  0.493757
2016-01-06  0.556754  0.478497
2016-01-07  0.454763  0.559304
2016-01-08  0.549488  0.472055
...              ...       ...
2016-12-23  0.722734  0.288077
2016-12-27  0.272835  0.726411
2016-12-28  0.807231  0.211313
2016-12-29  0.192905  0.820350
2016-12-30  0.162074  0.845676

[252 rows x 2 columns]
                 DIA       XLI
Date                          
2016-01-04  0.167538  0.596566
2016-01-05  0.238922  0.808807
2016-01-06  0.357686  0.359901
2016-01-07  0.366034  0.169416
2016-01-08  0.305240  0.524729
...              ...       ...
2016-12-23  0.411516  0.638777
2016-12-27  0.325333  0.728427
2016-12-28  0.781337  0.134598
2016-12-29  0.446698  0.584672
2016-12-30  0.556438  0.429441

[252 rows x 2 columns]
                 XLB       XLE
Date                          
2016-01-04  0.032367  0.905688
2016-01-05  0.354001  0.748423
2016-01-06  0.157770  0.110175
2016-01-07  0.060303  0.380104
2016-01-08  0.321217  0.404465
...              ...       ...
2016-12-23  0.669966  0.429927
2016-12-27  0.758987  0.403492
2016-12-28  0.284033  0.480577
2016-12-29  0.583170  0.478075
2016-12-30  0.250572  0.691505

[252 rows x 2 columns]
                 QQQ       XLK
Date                          
2016-01-04  0.037722  0.882561
2016-01-05  0.587852  0.403870
2016-01-06  0.671561  0.217379
2016-01-07  0.290255  0.355758
2016-01-08  0.365453  0.548820
...              ...       ...
2016-12-23  0.408004  0.624373
2016-12-27  0.724306  0.337771
2016-12-28  0.539670  0.363746
2016-12-29  0.210003  0.806755
2016-12-30  0.161425  0.762378

[252 rows x 2 columns]
                 XLP       XLV
Date                          
2016-01-04  0.270278  0.188756
2016-01-05  0.813373  0.373220
2016-01-06  0.595031  0.245856
2016-01-07  0.344326  0.126649
2016-01-08  0.488121  0.154803
...              ...       ...
2016-12-23  0.311580  0.831490
2016-12-27  0.426498  0.643879
2016-12-28  0.381481  0.379991
2016-12-29  0.812441  0.312016
2016-12-30  0.315767  0.557216

[252 rows x 2 columns]
                 HYG       JNK
Date                          
2016-01-04  0.262892  0.527599
2016-01-05  0.258283  0.825426
2016-01-06  0.815556  0.207479
2016-01-07  0.227218  0.551922
2016-01-08  0.765432  0.161336
...              ...       ...
2016-12-23  0.146533  0.911983
2016-12-27  0.910368  0.072783
2016-12-28  0.415803  0.581778
2016-12-29  0.363736  0.720795
2016-12-30  0.092637  0.939559

[252 rows x 2 columns]
                SPBO      SPTS
Date                          
2016-01-04  0.982201  0.164563
2016-01-05  0.077662  0.483582
2016-01-06  0.832030  0.708076
2016-01-07  0.209025  0.882466
2016-01-08  0.466248  0.741131
...              ...       ...
2016-12-23  0.334045  0.363705
2016-12-27  0.403939  0.563553
2016-12-28  0.865186  0.358865
2016-12-29  0.558391  0.880689
2016-12-30  0.580391  0.767062

[252 rows x 2 columns]
                SPHY       XLU
Date                          
2016-01-04  0.973232  0.284925
2016-01-05  0.053767  0.821957
2016-01-06  0.742877  0.381189
2016-01-07  0.758185  0.209802
2016-01-08  0.501752  0.478717
...              ...       ...
2016-12-23  0.936663  0.380766
2016-12-27  0.782963  0.483526
2016-12-28  0.467642  0.162538
2016-12-29  0.396947  0.921549
2016-12-30  0.805437  0.240099

[252 rows x 2 columns]
                 AGG       IEF
Date                          
2016-01-04  0.000220  0.999940
2016-01-05  0.847211  0.180482
2016-01-06  0.742697  0.370045
2016-01-07  0.013554  0.991153
2016-01-08  0.919934  0.132439
...              ...       ...
2016-12-23  0.254831  0.788551
2016-12-27  0.440686  0.543083
2016-12-28  0.198000  0.871293
2016-12-29  0.978527  0.043157
2016-12-30  0.754276  0.334097

[252 rows x 2 columns]
                SPXL     ^GSPC
Date                          
2016-01-04  0.566210  0.366002
2016-01-05  0.648482  0.374914
2016-01-06  0.327790  0.615203
2016-01-07  0.329700  0.576472
2016-01-08  0.338062  0.622151
...              ...       ...
2016-12-23  0.249566  0.759801
2016-12-27  0.606912  0.410120
2016-12-28  0.688278  0.287210
2016-12-29  0.283817  0.723907
2016-12-30  0.909136  0.087634

[252 rows x 2 columns]
                 GDX      GDXJ
Date                          
2016-01-04  0.446633  0.581536
2016-01-05  0.131133  0.867941
2016-01-06  0.534869  0.469571
2016-01-07  0.958817  0.063141
2016-01-08  0.627413  0.359751
...              ...       ...
2016-12-23  0.588073  0.412453
2016-12-27  0.302559  0.736770
2016-12-28  0.109722  0.903831
2016-12-29  0.864757  0.955641
2016-12-30  0.713513  0.109576

[252 rows x 2 columns]
                 KBE       KRE
Date                          
2016-01-04  0.543101  0.288144
2016-01-05  0.329151  0.684177
2016-01-06  0.133592  0.810731
2016-01-07  0.325813  0.449620
2016-01-08  0.378268  0.482930
...              ...       ...
2016-12-23  0.594045  0.429849
2016-12-27  0.152916  0.875860
2016-12-28  0.714662  0.242976
2016-12-29  0.413232  0.553474
2016-12-30  0.176862  0.846263

[252 rows x 2 columns]
                 LQD      SPIB
Date                          
2016-01-04  0.015712  0.996347
2016-01-05  0.139375  0.932670
2016-01-06  0.744309  0.466668
2016-01-07  0.460971  0.649756
2016-01-08  0.341869  0.771068
...              ...       ...
2016-12-23  0.642674  0.486539
2016-12-27  0.366097  0.566938
2016-12-28  0.436188  0.782848
2016-12-29  0.887569  0.262805
2016-12-30  0.148570  0.940116

[252 rows x 2 columns]
                 CWB       XLY
Date                          
2016-01-04  0.483766  0.121816
2016-01-05  0.860853  0.183776
2016-01-06  0.573868  0.205053
2016-01-07  0.288409  0.152153
2016-01-08  0.360738  0.308401
...              ...       ...
2016-12-23  0.989979  0.027021
2016-12-27  0.974290  0.089531
2016-12-28  0.113829  0.653353
2016-12-29  0.889807  0.157514
2016-12-30  0.482725  0.296585

[252 rows x 2 columns]
                SPIP       TIP
Date                          
2016-01-04  0.413764  0.625344
2016-01-05  0.735042  0.268918
2016-01-06  0.504450  0.517494
2016-01-07  0.630434  0.370276
2016-01-08  0.554878  0.442913
...              ...       ...
2016-12-23  0.969487  0.028385
2016-12-27  0.081850  0.918370
2016-12-28  0.584895  0.423728
2016-12-29  0.161154  0.838859
2016-12-30  0.454774  0.566649

[252 rows x 2 columns]
                 MBB      SPTI
Date                          
2016-01-04  0.977278  0.005634
2016-01-05  0.718009  0.296284
2016-01-06  0.875971  0.581059
2016-01-07  0.505277  0.597364
2016-01-08  0.587366  0.470657
...              ...       ...
2016-12-23  0.506366  0.504158
2016-12-27  0.692244  0.242352
2016-12-28  0.615222  0.670542
2016-12-29  0.940646  0.261214
2016-12-30  0.622969  0.564870

[252 rows x 2 columns]
                 MUB       TYD
Date                          
2016-01-04  0.007194  0.998796
2016-01-05  0.830072  0.235688
2016-01-06  0.980671  0.114181
2016-01-07  0.863759  0.210812
2016-01-08  0.642631  0.383883
...              ...       ...
2016-12-23  0.837998  0.198215
2016-12-27  0.564727  0.340332
2016-12-28  0.933311  0.172574
2016-12-29  0.483015  0.988969
2016-12-30  0.682376  0.077697

[252 rows x 2 columns]
                SPAB      SPTL
Date                          
2016-01-04  0.230168  0.776569
2016-01-05  0.948228  0.049650
2016-01-06  0.707333  0.529760
2016-01-07  0.044088  0.957673
2016-01-08  0.678943  0.340254
...              ...       ...
2016-12-23  0.794868  0.206137
2016-12-27  0.662520  0.332517
2016-12-28  0.225548  0.778883
2016-12-29  0.967456  0.044619
2016-12-30  0.702897  0.295207

[252 rows x 2 columns]
In [13]:
def long_only(history_df, probs):
    new = df[history_df.columns]
    new.columns = new.columns + ["_prices"]
    check = pd.concat([new,history_df],axis=1).dropna()
    check[f'{check.columns[2]} Long'] = np.where(check[check.columns[2]]<= probs, 1, 0)
    check[f'{check.columns[3]} Long'] = np.where(check[check.columns[3]]<= probs, 1, 0)
    return check

long_only(history[6], 0.1)
Out[13]:
SPBO_prices SPTS_prices SPBO SPTS SPBO Long SPTS Long
Date
2016-01-04 24.842628 27.543909 0.982201 0.164563 0 0
2016-01-05 24.667610 27.525602 0.077662 0.483582 1 0
2016-01-06 24.826712 27.580503 0.832030 0.708076 0 0
2016-01-07 24.794901 27.607965 0.209025 0.882466 0 0
2016-01-08 24.802851 27.626270 0.466248 0.741131 0 0
... ... ... ... ... ... ...
2016-12-23 25.778719 27.693068 0.334045 0.363705 0 0
2016-12-27 25.754118 27.693068 0.403939 0.563553 0 0
2016-12-28 25.829756 27.696762 0.865186 0.358865 0 0
2016-12-29 25.903746 27.752234 0.558391 0.880689 0 0
2016-12-30 25.944838 27.779961 0.580391 0.767062 0 0

252 rows × 6 columns

In [14]:
returns = pd.DataFrame.from_dict(algo_returns)
returns = np.exp(returns) - 1                              # convert log-returns to simple returns
total_ret = returns.sum(axis=1) / len(returns.columns) * 2 # double capital (from short positions)
strat = pd.DataFrame(total_ret+1, columns = ['Algo'])
strat['returns'] = np.nancumprod(total_ret+1)
strat
Out[14]:
Algo returns
Date
2016-01-04 1.000000 1.000000
2016-01-05 1.000595 1.000595
2016-01-06 1.002417 1.003013
2016-01-07 0.999864 1.002877
2016-01-08 0.999089 1.001963
... ... ...
2016-12-23 1.000121 1.077592
2016-12-27 0.999687 1.077255
2016-12-28 1.000970 1.078299
2016-12-29 0.998197 1.076355
2016-12-30 1.000170 1.076537

252 rows × 2 columns

In [15]:
benchmark = ["^GSPC"]
final = main.load_sql().set_index("Date")
end = strat.index[-1]
bm = main.dataframe(benchmark, final, "2000-01-01", end).pct_change().dropna()[strat.index[0]:]
bm["Benchmark"] = np.nancumprod(bm+1)
final = pd.concat([strat, bm],axis=1)
copula_strat = final[['returns','Benchmark']].reset_index()
fig = px.line(copula_strat, x="Date", y=copula_strat.columns,
              hover_data={"Date": "|%B %d, %Y"},
              title='Strategy Returns')
fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y")
fig.show()

Individualized¶

In [16]:
ind_strat = []
for i in algo_returns.keys():
    new = pd.DataFrame(algo_returns[i], columns = [i])
    ind_strat.append(new)
final_df = pd.concat(ind_strat, axis=1).fillna(0)
final_df_des = final_df.describe()
cumprod_daily_pct_change = (1 + final_df).cumprod().reset_index()
fig = px.line(cumprod_daily_pct_change, x="Date", y=cumprod_daily_pct_change.columns,
              hover_data={"Date": "|%B %d, %Y"},
              title='Pair Returns')
fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y")
fig.show()
In [17]:
### good idea would be to match this with fundamental momentum strategy (roic)
In [ ]: